package com.chance.cc.crawler.development.bootstrap.haoyisheng;

import com.alibaba.fastjson.JSON;
import com.chance.cc.crawler.core.CrawlerEnum;
import com.chance.cc.crawler.core.downloader.HttpConfig;
import com.chance.cc.crawler.core.downloader.proxy.Proxy;
import com.chance.cc.crawler.core.filter.FilterUtils;
import com.chance.cc.crawler.core.record.CrawlerRecord;
import com.chance.cc.crawler.core.record.CrawlerRequestRecord;
import com.chance.cc.crawler.development.controller.DevCrawlerController;
import org.apache.commons.lang3.StringUtils;

import static com.chance.cc.crawler.core.CrawlerEnum.CrawlerRecordFilter.key;
import static com.chance.cc.crawler.core.CrawlerEnum.CrawlerRequestType.*;

/**
 * @Author Zhao.Hhuan
 * @Date Create in 2021/7/5 10:51
 * @Description
 *      好医生
 **/
public class HaoYiSheng {
    private static final String domain = "haoyisheng";

    private static Proxy proxy = new Proxy();
    static {
        //代理配置
        proxy.setHost("http-dyn.abuyun.com");
        proxy.setPort(9020);
        proxy.setUsername("HL89Q19E86E2987D");
        proxy.setPassword("71F33D94CE5F7BF2");
    }

    public static void main(String[] args) {
        keywordRecord();
//        keywordOneRecord();
//        itemRecord();
    }

    private static void keywordRecord(){
        String url = "http://haoyisheng.com/search";
        String site = "searchKw";
        String siteBiz = "news-realtime";

        CrawlerRequestRecord crawlerRequestRecord = CrawlerRequestRecord.builder()
                .startPageRequest(domain, turnPage)
                .domain(domain)
                .httpUrl(url)
                .httpConfig(HttpConfig.me(domain))
                .filter(CrawlerEnum.CrawlerRecordFilter.dateRange)
                .addFilterInfo(FilterUtils.dateRangeFilterInfo(24 * 7,null))
                .releaseTime(System.currentTimeMillis())
                .needParsed(false)
                .resultLabelTag(CrawlerEnum.CrawlerDataType.article)
                .resultLabelTag(CrawlerEnum.CrawlerDataType.interaction)
//                .proxy(proxy)
                .build();
        crawlerRequestRecord.setDownload(false);
        crawlerRequestRecord.setSkipPipeline(true);
        crawlerRequestRecord.tagsCreator().bizTags().addDomain(domain);
        crawlerRequestRecord.tagsCreator().bizTags().addSite(site);
        crawlerRequestRecord.tagsCreator().bizTags().addSiteBiz(siteBiz);

        crawlerRequestRecord.getHttpRequest().addExtra("searchKwSourceUrl","http://haoyisheng.com/search?keyword=%s");

        CrawlerRequestRecord keywordRecord = CrawlerRequestRecord.builder()
                .startPageRequest("haoyisheng_keyword",turnPageItem)
                .httpUrl("http://192.168.1.217:9599/v1/meta/"+domain+"/keys?site=searchKw")
                .requestLabelTag(supportSource)
                .requestLabelTag(internalDownload)
                .build();

        DevCrawlerController.builder()
                .triggerInfo(domain,domain,System.currentTimeMillis(),domain)
                .crawlerRequestQueue(DevCrawlerController.devRequestQueue(domain))
                .consoleResultPipeline()//控制台输出
//                .fileResultPipeline("D:\\chance\\log\\tets.log",true) //文件输出
                .requestRecord(crawlerRequestRecord)
                .supportRecord(keywordRecord)
                .build()
                .start();
    }

    private static void keywordOneRecord(){
        String url = "http://api.appsdk.soku.com/i/s?_t_=1618741004&e=md5&_s_=779f531e8476961eae3378163c8abb67&keyword=nike&ob=1&pg=1";
        String site = "searchKw";
        String siteBiz = "news-realtime";

        CrawlerRequestRecord crawlerRequestRecord = CrawlerRequestRecord.builder()
                .startPageRequest(domain, turnPage)
                .domain(domain)
                .httpUrl(url)
                .httpConfig(HttpConfig.me(domain))
                .filter(CrawlerEnum.CrawlerRecordFilter.dateRange)
                .addFilterInfo(FilterUtils.dateRangeFilterInfo(24 * 7,null))
                .releaseTime(System.currentTimeMillis())
                .resultLabelTag(CrawlerEnum.CrawlerDataType.article)
                .resultLabelTag(CrawlerEnum.CrawlerDataType.interaction)
                .resultLabelTag(CrawlerEnum.CrawlerDataType.comment)
                .build();
        crawlerRequestRecord.tagsCreator().bizTags().addDomain(domain);
        crawlerRequestRecord.tagsCreator().bizTags().addSite(site);
        crawlerRequestRecord.tagsCreator().bizTags().addSiteBiz(siteBiz);
        crawlerRequestRecord.getHttpRequest().addExtra("keyword","耐克");

        CrawlerRecord commentFilter = new CrawlerRequestRecord();
        commentFilter.setFilter(key);
        commentFilter.addFilterInfo(FilterUtils.memoryFilterKeyInfo(StringUtils.joinWith("-","filter",domain,site,siteBiz,"queue")));
//        commentFilter.addFilterInfo(FilterUtils.dateRangeFilterInfo(24 * 1,null));
        crawlerRequestRecord.tagsCreator().resultTags().getCategoryTag().addKVTag("comment_filter_record", JSON.toJSONString(commentFilter));

        DevCrawlerController.builder()
                .triggerInfo(domain,domain,System.currentTimeMillis(),domain)
                .crawlerRequestQueue(DevCrawlerController.devRequestQueue(domain))
                .consoleResultPipeline()//控制台输出
//                .fileResultPipeline("D:\\chance\\log\\tets.log",true) //文件输出
                .requestRecord(crawlerRequestRecord)
                .build()
                .start();
    }

    private static void itemRecord(){
//        String url = "https://v.youku.com/v_show/id_XNTAxOTEzNTQwMA==.html";
//        String url = "https://v.youku.com/v_show/id_XNTEzMzY0NjQ4NA==.html";
        String url = "https://v.youku.com/v_show/id_XNTEyNjk2MTI0NA==.html";
        String site = "searchKw";
        String siteBiz = "news-realtime";

        CrawlerRequestRecord crawlerRequestRecord = CrawlerRequestRecord.builder()
                .startPageRequest(domain, turnPageItem)
                .domain(domain)
                .httpUrl(url)
                .httpConfig(HttpConfig.me(domain))
                .filter(CrawlerEnum.CrawlerRecordFilter.dateRange)
                .addFilterInfo(FilterUtils.dateRangeFilterInfo(24 * 7,null))
                .releaseTime(System.currentTimeMillis())
                .resultLabelTag(CrawlerEnum.CrawlerDataType.article)
                .resultLabelTag(CrawlerEnum.CrawlerDataType.interaction)
//                .resultLabelTag(CrawlerEnum.CrawlerDataType.comment)
//                .proxy(proxy)
                .build();
        crawlerRequestRecord.tagsCreator().bizTags().addDomain(domain);
        crawlerRequestRecord.tagsCreator().bizTags().addSite(site);
        crawlerRequestRecord.tagsCreator().bizTags().addSiteBiz(siteBiz);

        CrawlerRecord commentFilter = new CrawlerRequestRecord();
//        commentFilter.setFilter(keyOrDateRange);
//        commentFilter.addFilterInfo(FilterUtils.memoryFilterKeyInfo(StringUtils.joinWith("-","filter",domain,site,siteBiz,"queue")));
//        commentFilter.addFilterInfo(FilterUtils.dateRangeFilterInfo(24 * 1,null));
        crawlerRequestRecord.tagsCreator().resultTags().getCategoryTag().addKVTag("comment_filter_record", JSON.toJSONString(commentFilter));

        DevCrawlerController.builder()
                .triggerInfo(domain,domain,System.currentTimeMillis(),domain)
                .crawlerRequestQueue(DevCrawlerController.devRequestQueue(domain))
                .consoleResultPipeline()//控制台输出
//                .fileResultPipeline("D:\\chance\\log\\tets.log",true) //文件输出
                .requestRecord(crawlerRequestRecord)
                .build()
                .start();
    }
}
